{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.feature_selection import (\n",
    "    f_regression,\n",
    "    SelectKBest,\n",
    "    SelectFromModel,\n",
    ")\n",
    "\n",
    "from sklearn.linear_model import Lasso\n",
    "\n",
    "from feature_engine.wrappers import SklearnTransformerWrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0   1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1   2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2   3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3   4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4   5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \\\n",
       "0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   \n",
       "2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   \n",
       "3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   \n",
       "\n",
       "  YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0   2008        WD         Normal     208500  \n",
       "1   2007        WD         Normal     181500  \n",
       "2   2008        WD         Normal     223500  \n",
       "3   2006        WD        Abnorml     140000  \n",
       "4   2008        WD         Normal     250000  \n",
       "\n",
       "[5 rows x 81 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load dataset\n",
    "\n",
    "data = pd.read_csv('houseprice.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1022, 79), (438, 79))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's separate into training and testing set\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    data.drop(['Id', 'SalePrice'], axis=1),\n",
    "    data['SalePrice'],\n",
    "    test_size=0.3,\n",
    "    random_state=0,\n",
    ")\n",
    "\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Select K Best"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['MSSubClass',\n",
       " 'LotFrontage',\n",
       " 'LotArea',\n",
       " 'OverallQual',\n",
       " 'OverallCond',\n",
       " 'YearBuilt',\n",
       " 'YearRemodAdd',\n",
       " 'MasVnrArea',\n",
       " 'BsmtFinSF1',\n",
       " 'BsmtFinSF2',\n",
       " 'BsmtUnfSF',\n",
       " 'TotalBsmtSF',\n",
       " '1stFlrSF',\n",
       " '2ndFlrSF',\n",
       " 'LowQualFinSF',\n",
       " 'GrLivArea',\n",
       " 'BsmtFullBath',\n",
       " 'BsmtHalfBath',\n",
       " 'FullBath',\n",
       " 'HalfBath',\n",
       " 'BedroomAbvGr',\n",
       " 'KitchenAbvGr',\n",
       " 'TotRmsAbvGrd',\n",
       " 'Fireplaces',\n",
       " 'GarageYrBlt',\n",
       " 'GarageCars',\n",
       " 'GarageArea',\n",
       " 'WoodDeckSF',\n",
       " 'OpenPorchSF',\n",
       " 'EnclosedPorch',\n",
       " '3SsnPorch',\n",
       " 'ScreenPorch',\n",
       " 'PoolArea',\n",
       " 'MiscVal',\n",
       " 'MoSold',\n",
       " 'YrSold']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# variables to evaluate:\n",
    "\n",
    "cols = [var for var in X_train.columns if X_train[var].dtypes !='O']\n",
    "\n",
    "cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SklearnTransformerWrapper(transformer=SelectKBest(k=5,\n",
       "                                                  score_func=<function f_regression at 0x0000007EFF7D7F70>),\n",
       "                          variables=['MSSubClass', 'LotFrontage', 'LotArea',\n",
       "                                     'OverallQual', 'OverallCond', 'YearBuilt',\n",
       "                                     'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1',\n",
       "                                     'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',\n",
       "                                     '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',\n",
       "                                     'GrLivArea', 'BsmtFullBath',\n",
       "                                     'BsmtHalfBath', 'FullBath', 'HalfBath',\n",
       "                                     'BedroomAbvGr', 'KitchenAbvGr',\n",
       "                                     'TotRmsAbvGrd', 'Fireplaces',\n",
       "                                     'GarageYrBlt', 'GarageCars', 'GarageArea',\n",
       "                                     'WoodDeckSF', 'OpenPorchSF',\n",
       "                                     'EnclosedPorch', ...])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's use select K best to select the best k variables\n",
    "\n",
    "selector = SklearnTransformerWrapper(\n",
    "    transformer = SelectKBest(f_regression, k=5),\n",
    "    variables = cols)\n",
    "\n",
    "selector.fit(X_train.fillna(0), y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 3, 11, 15, 25, 26], dtype=int64)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selector.transformer_.get_support(indices=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['LotArea', 'Neighborhood', 'HouseStyle', 'MasVnrArea', 'ExterQual'], dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# selecteed features\n",
    "\n",
    "X_train.columns[selector.transformer_.get_support(indices=True)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the transformer returns the selected variables from the list\n",
    "# we passed to the transformer PLUS the remaining variables \n",
    "# in the dataframe that were not examined\n",
    "\n",
    "X_train_t = selector.transform(X_train.fillna(0))\n",
    "X_test_t = selector.transform(X_test.fillna(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Neighborhood</th>\n",
       "      <th>HouseStyle</th>\n",
       "      <th>MasVnrArea</th>\n",
       "      <th>ExterQual</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>...</th>\n",
       "      <th>GarageType</th>\n",
       "      <th>GarageFinish</th>\n",
       "      <th>GarageQual</th>\n",
       "      <th>GarageCond</th>\n",
       "      <th>PavedDrive</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>529</th>\n",
       "      <td>32668</td>\n",
       "      <td>Crawfor</td>\n",
       "      <td>1Story</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Gd</td>\n",
       "      <td>RL</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Alloca</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>491</th>\n",
       "      <td>9490</td>\n",
       "      <td>NAmes</td>\n",
       "      <td>1.5Fin</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TA</td>\n",
       "      <td>RL</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>459</th>\n",
       "      <td>7015</td>\n",
       "      <td>BrkSide</td>\n",
       "      <td>1.5Fin</td>\n",
       "      <td>161.0</td>\n",
       "      <td>TA</td>\n",
       "      <td>RL</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Bnk</td>\n",
       "      <td>...</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>10005</td>\n",
       "      <td>ClearCr</td>\n",
       "      <td>2Story</td>\n",
       "      <td>299.0</td>\n",
       "      <td>TA</td>\n",
       "      <td>RL</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Fin</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>1680</td>\n",
       "      <td>BrDale</td>\n",
       "      <td>2Story</td>\n",
       "      <td>381.0</td>\n",
       "      <td>TA</td>\n",
       "      <td>RM</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>...</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Family</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     LotArea Neighborhood HouseStyle  MasVnrArea ExterQual MSZoning Street  \\\n",
       "529    32668      Crawfor     1Story         0.0        Gd       RL   Pave   \n",
       "491     9490        NAmes     1.5Fin         0.0        TA       RL   Pave   \n",
       "459     7015      BrkSide     1.5Fin       161.0        TA       RL   Pave   \n",
       "279    10005      ClearCr     2Story       299.0        TA       RL   Pave   \n",
       "655     1680       BrDale     2Story       381.0        TA       RM   Pave   \n",
       "\n",
       "    Alley LotShape LandContour  ... GarageType GarageFinish GarageQual  \\\n",
       "529     0      IR1         Lvl  ...     Attchd          RFn         TA   \n",
       "491     0      Reg         Lvl  ...     Attchd          Unf         TA   \n",
       "459     0      IR1         Bnk  ...     Detchd          Unf         TA   \n",
       "279     0      Reg         Lvl  ...     Attchd          Fin         TA   \n",
       "655     0      Reg         Lvl  ...     Detchd          Unf         TA   \n",
       "\n",
       "    GarageCond PavedDrive PoolQC  Fence MiscFeature SaleType SaleCondition  \n",
       "529         TA          Y      0      0           0       WD        Alloca  \n",
       "491         TA          Y      0  MnPrv           0       WD        Normal  \n",
       "459         TA          Y      0      0           0       WD        Normal  \n",
       "279         TA          Y      0      0           0       WD        Normal  \n",
       "655         TA          Y      0      0           0       WD        Family  \n",
       "\n",
       "[5 rows x 48 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_t.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SelectFromModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SklearnTransformerWrapper(transformer=SelectFromModel(estimator=Lasso(alpha=10000,\n",
       "                                                                      random_state=0)),\n",
       "                          variables=['MSSubClass', 'LotFrontage', 'LotArea',\n",
       "                                     'OverallQual', 'OverallCond', 'YearBuilt',\n",
       "                                     'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1',\n",
       "                                     'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',\n",
       "                                     '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',\n",
       "                                     'GrLivArea', 'BsmtFullBath',\n",
       "                                     'BsmtHalfBath', 'FullBath', 'HalfBath',\n",
       "                                     'BedroomAbvGr', 'KitchenAbvGr',\n",
       "                                     'TotRmsAbvGrd', 'Fireplaces',\n",
       "                                     'GarageYrBlt', 'GarageCars', 'GarageArea',\n",
       "                                     'WoodDeckSF', 'OpenPorchSF',\n",
       "                                     'EnclosedPorch', ...])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's select the best variables according to Lasso\n",
    "\n",
    "lasso = Lasso(alpha=10000, random_state=0)\n",
    "\n",
    "sfm = SelectFromModel(lasso, prefit=False)\n",
    "\n",
    "selector = SklearnTransformerWrapper(\n",
    "    transformer = sfm,\n",
    "    variables = cols)\n",
    "\n",
    "selector.fit(X_train.fillna(0), y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 24, 26,\n",
       "       27, 28, 29, 30, 31, 32, 33], dtype=int64)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selector.transformer_.get_support(indices=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(selector.transformer_.get_support(indices=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "36"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the transformer returns the selected variables from the list\n",
    "# we passed to the transformer PLUS the remaining variables \n",
    "# in the dataframe that were not examined\n",
    "\n",
    "X_train_t = selector.transform(X_train.fillna(0))\n",
    "X_test_t = selector.transform(X_test.fillna(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>...</th>\n",
       "      <th>GarageType</th>\n",
       "      <th>GarageFinish</th>\n",
       "      <th>GarageQual</th>\n",
       "      <th>GarageCond</th>\n",
       "      <th>PavedDrive</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>529</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32668</td>\n",
       "      <td>0</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>CulDSac</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Alloca</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>491</th>\n",
       "      <td>50</td>\n",
       "      <td>RL</td>\n",
       "      <td>79.0</td>\n",
       "      <td>9490</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>459</th>\n",
       "      <td>50</td>\n",
       "      <td>RL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7015</td>\n",
       "      <td>0</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Bnk</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Corner</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>83.0</td>\n",
       "      <td>10005</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Fin</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>160</td>\n",
       "      <td>RM</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1680</td>\n",
       "      <td>0</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Y</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>WD</td>\n",
       "      <td>Family</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 67 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     MSSubClass MSZoning  LotFrontage  LotArea Alley LotShape LandContour  \\\n",
       "529          20       RL          0.0    32668     0      IR1         Lvl   \n",
       "491          50       RL         79.0     9490     0      Reg         Lvl   \n",
       "459          50       RL          0.0     7015     0      IR1         Bnk   \n",
       "279          60       RL         83.0    10005     0      Reg         Lvl   \n",
       "655         160       RM         21.0     1680     0      Reg         Lvl   \n",
       "\n",
       "    Utilities LotConfig LandSlope  ... GarageType GarageFinish GarageQual  \\\n",
       "529    AllPub   CulDSac       Gtl  ...     Attchd          RFn         TA   \n",
       "491    AllPub    Inside       Gtl  ...     Attchd          Unf         TA   \n",
       "459    AllPub    Corner       Gtl  ...     Detchd          Unf         TA   \n",
       "279    AllPub    Inside       Gtl  ...     Attchd          Fin         TA   \n",
       "655    AllPub    Inside       Gtl  ...     Detchd          Unf         TA   \n",
       "\n",
       "    GarageCond PavedDrive PoolQC  Fence MiscFeature SaleType SaleCondition  \n",
       "529         TA          Y      0      0           0       WD        Alloca  \n",
       "491         TA          Y      0  MnPrv           0       WD        Normal  \n",
       "459         TA          Y      0      0           0       WD        Normal  \n",
       "279         TA          Y      0      0           0       WD        Normal  \n",
       "655         TA          Y      0      0           0       WD        Family  \n",
       "\n",
       "[5 rows x 67 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_t.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fenotebook",
   "language": "python",
   "name": "fenotebook"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
